GPS loggers are not the same for successful and failed breeders so they have to be processed differently before being merged. Moreover, data from 2015 and 2018 have different formats and different constraints so they are also processed separately. This file aims at formatting the data and compare results for 2015 when cleaning trips based on different threshold parameters.

Bird data import for 2015

Import data for successful breerders

Loggers deployed on successful breeders are IgotU GPS. They give LOCAL TIME in UTC (GMT+5) and record locations every 5 minutes.

library(fields)
library(maptools)
library(maps)
library(stringr)
library(xlsx)
library(plotrix)
library(pastecs)
library(kableExtra)
library(plyr)
library(lme4)
library(afex)
library(simr)
source("R scripts/Functions.R")

#Coordinates of the colony
colony<-data.frame(Longitude =77.52356,Latitude=-37.8545)
date.max<-as.POSIXct("2015-12-22 12:00:00", format="%F %H:%M:%S",tz="UTC") 

##Import data for successful birds (classical GPS)
success.2015<-read.table("Data/RawData/ams_yna_2015-16_gps_success.txt",header=T,sep="\t")
success.2015$Status<-as.factor("success")
success.2015$DateTime<-as.POSIXct(paste(strptime(success.2015$Date,format="%d/%m/%Y"),
                                        success.2015$Time,sep=" "),tz="UTC")
success.2015<-success.2015[,c("Logger.ID","Status","DateTime","Longitude","Latitude","Speed","Altitude")]

#distance between location and colony
success.2015$Distmax<-as.vector(rdist.earth(success.2015[,c("Longitude","Latitude")],
                                       colony[1,c("Longitude","Latitude")],miles=F))  
names(success.2015)[1]<-"ID"
success.2015$Distadj<-0
success.2015$Difftime<-0

#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
for ( n in 2:nrow(success.2015)){
  success.2015$Distadj[n]<-rdist.earth(success.2015[n,c("Longitude","Latitude")],
                                  success.2015[n-1,c("Longitude","Latitude")],miles=F)
   success.2015$Difftime[n]<-difftime(success.2015$DateTime[n],success.2015$DateTime[n-1],units="mins")
}

#remove time difference when changing individuals
success.2015$Difftime[which(success.2015$Difftime<0)]<-0

Import data for failed breeders

Loggers deployed on failed breeders are Ecotone GPS-UHF. They give time in GMT so 5h have to be added to obtain local time (UTC=GMT+5).They record locations every 5-15 minutes, depending on weather conditions. They provide more gaps in the data

##Import data for failed birds (GPS-UHF)
fail.2015<-read.table("Data/RawData/ams_yna_2015-16_gps_fail.txt",header=T,sep="\t")
fail.2015$Date<-paste(fail.2015$Year,fail.2015$Month,fail.2015$Day,sep="/")
fail.2015$Time<-paste(fail.2015$Hour,fail.2015$Minute,fail.2015$Second,sep=":")
fail.2015$Status<-as.factor("fail")
fail.2015$DateTime<-as.POSIXct(paste(strptime(fail.2015$Date,format="%Y/%m/%d"),fail.2015$Time,sep=" "),tz="UTC") 
fail.2015$DateTime<-fail.2015$DateTime + 5*3600
fail.2015<-fail.2015[,c("Logger.ID","Status","DateTime","Longitude","Latitude","Speed","Altitude","In.range")]

#when "in.range" is true, locations are set to colony coordinates
fail.2015$Longitude[which(is.na(fail.2015$Longitude)==T & fail.2015$In.range==1)]<-colony$Longitude   
fail.2015$Latitude[which(is.na(fail.2015$Latitude)==T & fail.2015$In.range==1)]<-colony$Latitude 
fail.2015<-fail.2015[!is.na(fail.2015$Latitude)==T,]

#distance between location and colony
fail.2015$Distmax<-NA
fail.2015$Distmax<-as.vector(rdist.earth(fail.2015[,c("Longitude","Latitude")],
                                         colony[1,c("Longitude","Latitude")],miles=F))
fail.2015$Distmax[fail.2015$In.range==1]<-0
names(fail.2015)[1]<-"ID"
fail.2015<-fail.2015[,-8]
fail.2015$Distadj<-0

#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
fail.2015$Difftime<-0
for (n in 2:nrow(fail.2015)){
  fail.2015$Distadj[n]<-rdist.earth(fail.2015[n,c("Longitude","Latitude")],
                               fail.2015[n-1,c("Longitude","Latitude")],miles=F)
  fail.2015$Difftime[n]<-difftime(fail.2015$DateTime[n],fail.2015$DateTime[n-1],units="mins")
}
fail.2015$Difftime[which(fail.2015$Difftime<0)]<-0
fail.2015<-subset(fail.2015,fail.2015$DateTime < date.max)

#remove HAR10 which has incomplete trips
fail.2015<-subset(fail.2015,fail.2015$ID!="HAR10")

Merge data 2015, define trips and clean trips

Parameters to define a trip are based on distance to the colony, number of consecutive locations out of the colony and total duration of the trip.

dataset.2015<-rbind(success.2015,fail.2015)
dataset.2015<-dataset.2015[duplicated(dataset.2015$DateTime)==FALSE,] # remove duplicated points with the same date and time
dataset.2015$TravelNb<-"NA"
dataset.2015$PathLength<-0

##Parameters to define a trip in 2015
dist.thres<-1             #distance threshold (in km)
last.dist<-10           #distance threshold (in km) when having only one location between 2 trips
row.thres<- 12               #number of rows constituting a trip
dur.thres<-120         #min duration of a trip

#Define trip number within each individual
dataset.2015<-define_trips(dataset.2015)   #function from source file
dataset.2015$TravelID<-paste(dataset.2015$ID,dataset.2015$TravelNb,sep=".")

save(dataset.2015,file="Data/NewlyCreatedData/raw_trips_2015.RData")

#summarize raw trips by individuals or status. 
#There is no time gap thresholds here, just total duration and distance
dist.max.all.trips.2015.status<-raw_trips_summary_status(dataset.2015)  #function from source file

#10h gaps
diff.thres<-10*60
trips10h<-clean_trips_summary_status(dataset.2015,diff.thres)    #function from source file

#8h gaps
diff.thres<-8*60
trips8h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#6h gaps
diff.thres<-6*60
trips6h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#5h gaps
diff.thres<-5*60
trips5h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#4h gaps
diff.thres<-4*60
trips4h<-clean_trips_summary_status(dataset.2015,diff.thres)   

#3h gaps
diff.thres<-3*60
trips3h<-clean_trips_summary_status(dataset.2015,diff.thres)  

Summary of raw and selected trips by status

The first table shows the summary of trip characteristics with raw data (without any time gap threshold). The others show the summary of trip characteristics with different time gap thresholds.

Summary of raw trips
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SETotPath MinTotPath MaxTotPath
success 8 24 340.4639 39.92386 155.2509 852.6335 52.7995 6.405953 17.417778 171.0675 1098.5590 139.3908 407.31755 3205.429
fail 12 28 470.1367 69.76282 28.6290 1229.8146 71.6843 10.229407 3.783333 174.5119 754.1347 112.3100 58.46149 2312.912
Summary of trips with gaps <10h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 11 23 432.1098 79.20488 28.6290 1229.8146 59.81384 10.510680 3.783333 163.0244 728.9346 132.1566 58.46149 2312.912
Summary of trips with gaps <8h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 11 23 432.1098 79.20488 28.6290 1229.8146 59.81384 10.510680 3.783333 163.0244 728.9346 132.1566 58.46149 2312.912
Summary trips with gaps <6h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 11 23 432.1098 79.20488 28.6290 1229.8146 59.81384 10.510680 3.783333 163.0244 728.9346 132.1566 58.46149 2312.912
Summary trips with gaps <5h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 10 21 438.8636 86.63242 28.6290 1229.8146 55.73952 10.303432 3.783333 155.9008 736.4904 140.1605 58.46149 2312.912
Summary trips with gaps <4h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 8 17 353.4148 88.95200 28.6290 1229.8146 46.78624 10.553893 3.783333 137.9333 709.6711 161.7665 58.46149 2312.912
Summary trips with gaps <3h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 8 23 336.4499 41.48781 155.2509 852.6335 53.51965 6.648387 17.417778 171.0675 1106.5136 145.3517 407.31755 3205.429
fail 7 15 248.4702 59.04111 28.6290 826.0564 34.81906 7.569682 3.783333 102.1197 545.5721 127.3710 58.46149 1443.117

Plots of trips by individuals

In the plots, all black dots represent trips/data that have been excluded. All colored trips are the selected ones.

Interpolation of clean trips

Proportion of time gaps >16, 20 and 30min are calculated to choose the best interpolation

diff.thres<- 5*60
clean.trips.loc.2015<-clean_trips_locations(dataset.2015,diff.thres)
clean.trips.loc.2015$TravelID<-paste(clean.trips.loc.2015$ID,clean.trips.loc.2015$TravelNb,sep=".")


#Number of gaps > 16min
length(which(clean.trips.loc.2015$Difftime>16))/nrow(clean.trips.loc.2015)*100
## [1] 2.773826
#Number of gaps > 20min
length(which(clean.trips.loc.2015$Difftime>20))/nrow(clean.trips.loc.2015)*100
## [1] 2.498815
#Number of gaps > 30min
length(which(clean.trips.loc.2015$Difftime>30))/nrow(clean.trips.loc.2015)*100
## [1] 1.650071
##Interpolation
 reso <- 15 * 60       #time resolution between 2 consecutive points in seconds

new.trip<-NULL
loc.interpolated.2015<-NULL
id<-unique(clean.trips.loc.2015$ID)

 #interpolate data for each individual separately 
for (x in 1:length(id)){
  sub<-subset(clean.trips.loc.2015,clean.trips.loc.2015$ID==id[x])
  trip<-unique(sub$TravelNb)

   #interpolate data for each trip separately 
  for (y in 1:length(trip)){
    sub.trip<-subset(sub,sub$TravelNb==trip[y])
    sub.trip$TimeSinceOrigin<-rep(0,nrow(sub.trip))

  for (z in 1:nrow(sub.trip)){
    sub.trip$TimeSinceOrigin[z]<-difftime(sub.trip$DateTime[z],sub.trip$DateTime[1],units="sec")
  }

 
  # Resampling of long and lat with regular time intervals 
  sub.lat1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Latitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")
  sub.lon1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Longitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")

  new.sub <- data.frame(Longitude=sub.lon1[[2]]$Series, Latitude=sub.lat1[[2]]$Series, Time=as.vector(sub.lon1[[1]]),
                        DateTime=sub.trip$DateTime[1]+as.vector(sub.lon1[[1]]),TravelNb=trip[y])

  #Recreate a dataframe with individual and trip characteristics
  new.sub$ID<-id[x]
  new.sub$Status<-rep(unique(sub$Status),nrow(new.sub))
  new.sub$Distmax<-as.vector(rdist.earth(new.sub[,c("Longitude","Latitude")],
                                         colony[1,c(1,2)],miles=F))
  new.sub$Difftime<-c(0,difftime(new.sub$DateTime[2:nrow(new.sub)],
                                 new.sub$DateTime[1:nrow(new.sub)-1],units="mins"))
  new.sub$Distadj<-0
  new.sub$TravelID<-paste(new.sub$ID,new.sub$TravelNb,sep=".")

  for (z in 2:nrow(new.sub)){
    new.sub$Distadj[z]<-rdist.earth(new.sub[z,c("Longitude","Latitude")],
                                    new.sub[z-1,c("Longitude","Latitude")], miles=F)
  }

 new.trip<-rbind(new.trip,new.sub)
  }
}

#create a new global dataset with interpolated data
loc.interpolated.2015<-new.trip[,c("ID","Status","TravelNb","TravelID","DateTime","Longitude","Latitude",
                                   "Distmax","Difftime","Distadj")]


save(loc.interpolated.2015,file="Data/NewlyCreatedData/clean_interp_loc_2015.RData")

Statistics

Linear mixed models are applied to test whether the trip variables are different between failed and successful breeders on the raw trips

raw.trips.2015<-raw_trips_summary_ind(dataset.2015)

maxdist<-lmer(sqrt(Distmax) ~ Status + (1|ID),data=raw.trips.2015)
dur<-lmer(sqrt(TripDur) ~ Status + (1|ID),data=raw.trips.2015)
totdist<-lmer(sqrt(TotalPath) ~ Status + (1|ID),data=raw.trips.2015)

anova(maxdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## Status 40.265  40.265     1 16.802  1.2137 0.2861
anova(dur)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 7.3945  7.3945     1 14.16  1.1262 0.3063
anova(totdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF  DenDF F value  Pr(>F)  
## Status 343.01  343.01     1 16.058  3.7839 0.06947 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1